import os
import pandas as pd
import geopandas as gpd
import rasterio
import rasterio.plot
import rasterstats
from rasterstats import zonal_stats
import matplotlib.pyplot as plt
from pathlib import Path
from IPython.display import display
import plotly.express as px
import plotly.offline
plotly.offline.init_notebook_mode()
print('All libraries successfully imported!')
print(f'Rasterstats : {rasterstats.__version__}')
All libraries successfully imported! Rasterstats : 0.14.0
computer_path = '/export/miro/ndeffense/LBRAT2104/'
grp_letter = 'X'
lut_path = f'{computer_path}data/LUT/'
# Directory for all work files
work_path = f'{computer_path}GROUP_{grp_letter}/WORK/'
zonal_stat_path = f'{work_path}ZONAL_STATS/'
Path(zonal_stat_path).mkdir(parents=True, exist_ok=True)
print(f'Zonal Statistics path is set to : {zonal_stat_path}')
Zonal Statistics path is set to : /export/miro/ndeffense/LBRAT2104/GROUP_X/WORK/ZONAL_STATS/
site = 'NAMUR'
year = '2020'
nodata_val = -10000
vector_field = 'name'
# Look-up-table file
code_field = 'sub_nb'
name_field = 'sub'
lut_file = f'{lut_path}crop_dictionary_new.xlsx'
raster_file = f'{work_path}CLASSIF/{site}_{year}_CLASSIF_RF_with_NDVI.tif'
vector_file = f'{work_path}ROI/extent_roi_32631.shp'
zonal_stat_csv = f'{zonal_stat_path}zonal_stat_cat.csv'
zonal_stat_html = f'/export/miro/ndeffense/LBRAT2104/GIT/eo-toolbox/notebooks/A_Zonal_Statistics/figures/{site}_{year}_Zonal_Stat.html'
computer_path = '/export/projects/Sen4Stat/'
# Directory for all work files
work_path = f'{computer_path}WORK/'
# Raster file
raster_file = f'{work_path}CLASSIF/SITE_41/06-01_11-15/distMax_0/RF_OpenCV/SAMPLING_DESIGN_V25/SEN_2021_SITE_41_buf_0_LC_all_EXTENT_wall_to_wall_SEG_RATIO_100_LEVEL_grp_1_SD_25_FEAT_2_CLASSIF_RF_OpenCV_v1_reclassify_grp_A_nb.tif'
# Vector file
vector_file = f'{work_path}SITE_EXTENT/SITE_41/SITE_41_ALL_4326.shp'
vector_field = 'NAME_2'
# Look-up-table file
lut_file = f'{work_path}LUT/crop_dictionary_new.xlsx'
code_field = 'grp_A_nb'
name_field = 'grp_A'
# Zonal statistics output
zonal_stat_path = f'{work_path}ZONAL_STATS/'
zonal_stat_csv = f'{zonal_stat_path}zonal_stat_grp_A_feat_2.csv'
gdf = gpd.read_file(vector_file)
src = rasterio.open(raster_file, "r")
crs_vector = str(gdf.crs).split(":",1)[1]
crs_raster = str(src.crs).split(":",1)[1]
if crs_vector == crs_raster:
print(f'CRS are the same : EPSG:{crs_vector} = EPSG:{crs_raster}')
else:
print('We must reproject vector file')
gdf = gdf.to_crs(epsg=crs_raster)
CRS are the same : EPSG:32631 = EPSG:32631
Check if raster and vector file are intersecting
fig, ax = plt.subplots(1, 1, figsize=(10,10))
# Plot vector
gdf.plot(facecolor='none', edgecolor='red', linewidth = 4, ax=ax)
# Plot image
rasterio.plot.show(src, ax=ax)
plt.box(False)
rasterstats will report using the pixel values as keys. To associate the pixel values with their appropriate meaning, you can use a category_map
if os.path.isfile(lut_file):
lut_df = pd.read_excel(lut_file)
lut_df = lut_df[[code_field,name_field]].drop_duplicates()
cmap = {}
for index, row in lut_df.iterrows():
nb = row[code_field]
name = row[name_field]
cmap[nb] = f'{nb} - {name}'
else:
cmap = None
print(cmap)
{0: '0 - Unknown', 1111: '1111 - Winter wheat', 1112: '1112 - Spring wheat', 1113: '1113 - Hard wheat', 1114: '1114 - Soft wheat', 1115: '1115 - Triticale', 1121: '1121 - Maize', 1131: '1131 - Rice', 1141: '1141 - Sorghum', 1151: '1151 - Barley two-row', 1152: '1152 - Barley six-row', 1161: '1161 - Rye', 1171: '1171 - Oats', 1181: '1181 - Millets', 1191: '1191 - Mixed cereals', 1192: '1192 - Other cereals', 1193: '1193 - Quinoa', 1211: '1211 - Artichokes', 1212: '1212 - Asparagus', 1213: '1213 - Cabbages', 1214: '1214 - Cauliflowers & brocoli', 1215: '1215 - Lettuce', 1216: '1216 - Spinach', 1217: '1217 - Chicory', 1218: '1218 - Celery', 1219: '1219 - Other leafy or stem vegetables', 1221: '1221 - Cucumbers', 1222: '1222 - Eggplants (aubergines)', 1223: '1223 - Tomatoes', 1224: '1224 - Watermelons', 1225: '1225 - Cantaloupes and other melons', 1226: '1226 - Pumpkin, squash and gourds', 1227: '1227 - Strawberries', 1229: '1229 - Other fruit-bearing vegetables', 1231: '1231 - Carrots', 1232: '1232 - Turnips', 1233: '1233 - Garlic', 1234: '1234 - Onions (incl. shallots)', 1235: '1235 - Leeks & other alliaceous vegetables', 1236: '1236 - Beetroots', 1239: '1239 - Other root, buld or tuberous vegetables', 1241: '1241 - Mushrooms', 1291: '1291 - Other vegetables', 1411: '1411 - Soya beans', 1421: '1421 - Groundnuts', 1431: '1431 - Castor bean', 1432: '1432 - Linseed', 1433: '1433 - Mustard', 1434: '1434 - Niger seed', 1435: '1435 - Rapeseed', 1436: '1436 - Safflower', 1437: '1437 - Sesame', 1438: '1438 - Sunflower', 1439: '1439 - Other oilseed crops', 1511: '1511 - Potatoes', 1521: '1521 - Sweet potatoes', 1531: '1531 - Cassava', 1541: '1541 - Yams', 1591: '1591 - Other root/tuber crops', 1611: '1611 - Chilies and pepers', 1612: '1612 - Anise, badian and fennel', 1613: '1613 - Other spice crops', 1621: '1621 - Hops', 1711: '1711 - Beans', 1721: '1721 - Broad beans', 1731: '1731 - Chickpeas', 1741: '1741 - Cow peas', 1751: '1751 - Lentils', 1761: '1761 - Lupins', 1771: '1771 - Peas', 1781: '1781 - Pigeon peas', 1791: '1791 - Other leguminous crops', 1811: '1811 - Sugar beet', 1821: '1821 - Sugar cane', 1831: '1831 - Sweet sorghum', 1891: '1891 - Other sugar crops', 1911: '1911 - Alfalfa', 1912: '1912 - Vetches', 1919: '1919 - Grasses and other fodder crops', 1921: '1921 - Cotton', 1922: '1922 - Jute, kenaf and other similar crops', 1923: '1923 - Flax, hemp and other similar crops', 1929: '1929 - Other fibre crops', 1931: '1931 - Medicinal, aromatic, pesticidal or similar crops', 1941: '1941 - Flowers crops', 1991: '1991 - Tobacco', 1992: '1992 - Crop combinations', 1999: '1999 - Other annual crops', 2111: '2111 - No citrus fruits trees', 2112: '2112 - No citrus fruits shrub', 2121: '2121 - Orange', 2122: '2122 - Mandarin', 2123: '2123 - Lemon', 2124: '2124 - Bitter orange', 2125: '2125 - Grapefruit', 2129: '2129 - Other citrus fruits trees', 2131: '2131 - Pineapple', 2132: '2132 - Avocado', 2133: '2133 - Banana', 2134: '2134 - Cacao', 2136: '2136 - Coffee', 2137: '2137 - Tea', 2139: '2139 - Other exotic fruits tree', 2211: '2211 - Vineyards', 2311: '2311 - Olives groves', 2411: '2411 - Poplar', 2412: '2412 - Pawlonia', 2413: '2413 - Holm oaks', 2414: '2414 - Tree nurseries', 2415: '2415 - Carob trees', 2416: '2416 - Other woody crops', 2419: '2419 - Other woody crops (caper, wicker, mulberry, etc.)', 2911: '2911 - Aloe vera', 2912: '2912 - Agave', 2999: '2999 - Other perennial crops', 3111: '3111 - Natural meadows (mown once a year)', 3112: '3112 - High mountain pasture', 3113: '3113 - Grassland', 3199: '3199 - Grassland and meadows', 4111: '4111 - Fallows 1 year', 4112: '4112 - Fallows 2 years', 4113: '4113 - Fallows 3 years', 4114: '4114 - Fallows 4 years', 4115: '4115 - Fallows >5 years', 5111: '5111 - Shrub land', 6111: '6111 - Conifers', 6211: '6211 - Deciduous slow growth', 6221: '6221 - Deciduous rapid growth', 6311: '6311 - Conifers and deciduous', 6999: '6999 - Forest', 7111: '7111 - Sparsely vegetated', 7211: '7211 - Bare soils', 8111: '8111 - Urban', 8211: '8211 - Industrial and commercial', 8311: '8311 - Transport', 8411: '8411 - Greenhouses', 8511: '8511 - Other build-up surface', 9111: '9111 - Seas', 9121: '9121 - Coastal lagoons', 9122: '9122 - Estuaries', 9211: '9211 - Permanent non-current inland waters (reservoirs, swamps, lakes,...)', 9212: '9212 - Permanent rivers and streams', 9213: '9213 - Other permanent inland waters', 9221: '9221 - Temporary non-current inland waters (reservoirs, swamps, lakes,...)', 9222: '9222 - Temporary rivers and streams', 9223: '9223 - Irrigation pools, ponds, fountains, wells', 9224: '9224 - Other temporary inland waters', 9311: '9311 - Open hydraulic infrastructures (ditches, canals...)'}
dict_list = []
for i, row in gdf.iterrows():
name = row[vector_field]
dict_freq = zonal_stats(row.geometry,
raster_file,
categorical=True,
category_map=cmap,
nodata=nodata_val)[0]
dict_freq['name'] = name
dict_list.append(dict_freq)
df = pd.DataFrame(dict_list).set_index('name')
df.to_csv(zonal_stat_csv)
print(f'CSV file was created : {zonal_stat_csv}')
df = df.transpose()
df = df.sort_values(by=df.columns[0], ascending=False)
display(df)
CSV file was created : /export/miro/ndeffense/LBRAT2104/GROUP_X/WORK/ZONAL_STATS/zonal_stat_cat.csv
| name | ROI_wallonia |
|---|---|
| 8111 - Urban | 186835 |
| 3199 - Grassland and meadows | 144570 |
| 6999 - Forest | 113601 |
| 1111 - Winter wheat | 49126 |
| 1121 - Maize | 13728 |
| 1152 - Barley six-row | 12005 |
| 1811 - Sugar beet | 11675 |
| 1511 - Potatoes | 6124 |
| 1771 - Peas | 6090 |
| 1923 - Flax, hemp and other similar crops | 5111 |
| 1192 - Other cereals | 4132 |
| 1171 - Oats | 2795 |
| 1435 - Rapeseed | 2484 |
| 4111 - Fallows 1 year | 1229 |
| 1911 - Alfalfa | 823 |
| 8411 - Greenhouses | 706 |
df['percent'] = ((df.iloc[:,0]/ df.iloc[:,0].sum())*100).round(2)
display(df)
| name | ROI_wallonia | percent |
|---|---|---|
| 8111 - Urban | 186835 | 33.30 |
| 3199 - Grassland and meadows | 144570 | 25.77 |
| 6999 - Forest | 113601 | 20.25 |
| 1111 - Winter wheat | 49126 | 8.76 |
| 1121 - Maize | 13728 | 2.45 |
| 1152 - Barley six-row | 12005 | 2.14 |
| 1811 - Sugar beet | 11675 | 2.08 |
| 1511 - Potatoes | 6124 | 1.09 |
| 1771 - Peas | 6090 | 1.09 |
| 1923 - Flax, hemp and other similar crops | 5111 | 0.91 |
| 1192 - Other cereals | 4132 | 0.74 |
| 1171 - Oats | 2795 | 0.50 |
| 1435 - Rapeseed | 2484 | 0.44 |
| 4111 - Fallows 1 year | 1229 | 0.22 |
| 1911 - Alfalfa | 823 | 0.15 |
| 8411 - Greenhouses | 706 | 0.13 |
fig = px.bar(df,
x=df.index.str.slice(start=0, stop=20),
y=df.columns[0],
text='percent',
labels={'x': "Crop category",df.columns[0]:"Number of pixels"})
#fig.update_xaxes(tickfont_size=15)
#fig.update_yaxes(title_font=dict(size=20), tickfont_size=15)
#fig.update_layout(font_size=20)
fig.update_layout(xaxis_title=None)
fig.show()
fig.write_html(zonal_stat_html, full_html=False)